library(MASS)
library(nnet)
library(gbm)

this.seed<-round(runif(1)*10^8)
set.seed(this.seed)

source("CBPSMain.R")
source("CBPSContinuous.r")
source("npCBPS.R")

getbal<-function(dat, covars, wts){
  out <- rep(0,1 + 2*ncol(covars))
  out[1:ncol(covars)] <- coef(summary(lm(treat ~ covars, data = dat, weights = wts)))[-1, "t value"]^2
  for (i in 1:ncol(covars)){
    out[ncol(covars)+i] <- summary(lm(treat ~ covars[,i], data = dat, weights = wts))$fstatistic[1]
  }
  out[2*ncol(covars) + 1] <- summary(lm(treat ~ covars, data = dat, weights = wts))$fstatistic[1]
  return(out)
}

F.aac.iter=function(i,ps.model,data,ps.num,rep,criterion) { 
  
  GBM.fitted=predict(ps.model,newdata=data,n.trees=floor(i),type="response") 
  ps.den=dnorm((data$T-GBM.fitted)/sd(data$T-GBM.fitted),0,1)
  wt=ps.num/ps.den
  aac_iter=rep(NA,rep)
  for (i in 1:rep){
    bo=sample(1:dim(data)[1],replace=TRUE,prob=wt)
    newsample=data[bo,]
    j.drop=match(c("T"),names(data))
    j.drop=j.drop[!is.na(j.drop)]
    x=newsample[,-j.drop]
    if(criterion=="spearman"|criterion=="kendall"){
      ac=apply(x, MARGIN=2, FUN=cor, y=newsample$T,method=criterion)
    } else if (criterion=="distance"){
      ac=apply(x, MARGIN=2, FUN=dcor, y=newsample$T) 
    } else if (criterion=="pearson"){
      ac=matrix(NA,dim(x)[2],1)
      for (j in 1:dim(x)[2]){
        ac[j]=ifelse (!is.factor(x[,j]), cor(newsample$T, x[,j], 
                                             method=criterion),polyserial(newsample$T, x[,j]))
      }
    } else print("The criterion is not correctly specified")
    aac_iter[i]=mean(abs(1/2*log((1+ac)/(1-ac))),na.rm=TRUE)
  }
  aac=mean(aac_iter)
  return(aac)
}


niters<-1000

load("FECForCEM.RData")
orig.x<-x[,c("TotAds","TotalPop", "PercentOver65", "Inc", "PercentHispanic","PercentBlack", "density", 
             "per_hsgrads","per_collegegrads", "StFIPS", "CanCommute", "Treatment1", "Cont", "MedianHHInc")]
rm(x)
  
x<-orig.x[sample(1:nrow(orig.x),nrow(orig.x),replace = TRUE),]
max.norm.cor<-cor(qqnorm(x$TotAds)$x,qqnorm(x$TotAds)$y)
best.bc<-x$TotAds
best.lambda<-1
for (lambda in seq(-2,2,0.01))
{
  if (lambda != 0)
  {
    bc.totads<-((x$TotAds+1)^lambda - 1)/lambda
  }
  else
  {
    bc.totads<-log(x$TotAds+1)
  }
  norm.cor<-cor(qqnorm(bc.totads)$x,qqnorm(bc.totads)$y)   
  if (norm.cor > max.norm.cor)
  {
    max.norm.cor<-norm.cor
    best.bc<-bc.totads
    best.lambda<-lambda
  }
}
  
x$treat<-best.bc

pscore.form <- (treat ~ log(TotalPop) + PercentOver65 + Inc + PercentHispanic + PercentBlack + density + 
                per_collegegrads + CanCommute + I(log(TotalPop)^2) + I(PercentOver65^2) + I(Inc^2) + 
                I(PercentHispanic^2) + I(PercentBlack^2) + I(density^2) + I(per_collegegrads^2))

covars <- model.matrix(~ -1 + log(TotalPop) + PercentOver65 + Inc + PercentHispanic + PercentBlack + density + 
             per_collegegrads + CanCommute + I(log(TotalPop)^2) + I(PercentOver65^2) + I(Inc^2) + 
             I(PercentHispanic^2) + I(PercentBlack^2) + I(density^2) + I(per_collegegrads^2), data = x)

pscorefit.mle<-lm(pscore.form, data = x, x = TRUE)
mle.sigmasq<-mean((x$treat - fitted(pscorefit.mle))^2)
mle.stabilizers<-sapply(x$treat, function(t) mean(dnorm(t, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))))
pscorefit.mle.weights<-mle.stabilizers/dnorm(x$treat, mean = fitted(pscorefit.mle), sd = sqrt(mle.sigmasq))
pscorefit.exact<-CBPS(pscore.form, data = x, twostep = TRUE, method = "exact")
pscorefit.np<-npCBPS(pscore.form, data = x, corprior=0.1/nrow(x))

mydata=data.frame(T=x$treat,X=covars)
model.num=lm(T~1,data=mydata)
ps.num=dnorm((mydata$T-model.num$fitted)/(summary(model.num))$sigma,0,1)
model.den=gbm(T~.,data=mydata, shrinkage = 0.0005, interaction.depth=4, distribution="gaussian",n.trees=20000)
opt=optimize(F.aac.iter,interval=c(1,20000),ps.model=model.den,data=mydata,ps.num=ps.num,rep=50,criterion="pearson")
best.aac.iter=opt$minimum
best.aac=opt$objective
model.den$fitted=predict(model.den,newdata=mydata,n.trees=floor(best.aac.iter),type="response")   
ps.den=dnorm((mydata$T-model.den$fitted)/sd(mydata$T-model.den$fitted),0,1)
weight.gbm=ps.num/ps.den

x$mlew <- pscorefit.mle.weights
x$gbmw <- weight.gbm
x$exactw <- pscorefit.exact$weights
x$npw <- pscorefit.np$weights


baseline.balstats <- getbal(x, covars, rep(1, nrow(x)))
mle.balstats <- getbal(x, covars, pscorefit.mle.weights)
gbm.balstats <- getbal(x, covars, weight.gbm)
exact.balstats <- getbal(x, covars, pscorefit.exact$weights)
np.balstats <- getbal(x, covars, pscorefit.np$weights)

f.baseline <- summary(lm(pscore.form, data = x))$fstatistic[1]
f.mle <- summary(lm(pscore.form, data = x, 
                    weights = pscorefit.mle.weights))$fstatistic[1]
f.gbm <- summary(lm(pscore.form, data = x,
                    weights = weight.gbm))$fstatistic[1]
f.exact <- summary(lm(pscore.form, data = x,
                      weights = pscorefit.exact$weights))$fstatistic[1]
f.np <- summary(lm(pscore.form, data = x,
                   weights = pscorefit.np$weights))$fstatistic[1]
  
fit.outcome<-function(dat, wts, best.lambda){
  outfit<-lm(Cont ~ treat + I(treat^2) + factor(StFIPS), data = x, weights=wts)
  
  dose <- ifelse(best.lambda != 0, ((1000+1)^best.lambda-1)/best.lambda, log(1000+1))
  dose.eff <- coef(outfit)[2]*dose + coef(outfit)[3]*dose^2
  
  return(dose.eff)
}
  
eff.mle <- fit.outcome(x, x$mlew, best.lambda)
eff.np <- fit.outcome(x, x$npw, best.lambda)
eff.exact <- fit.outcome(x, x$exactw, best.lambda)
eff.gbm <- fit.outcome(x, x$gbmw, best.lambda)
  
out<-c(this.seed, baseline.balstats, mle.balstats, exact.balstats, 
       np.balstats, gbm.balstats, eff.mle, eff.exact, eff.np, eff.gbm)

write.csv(out,file=paste0("DollarsBootNP",this.seed,".csv"))

